########## DEPARTMENT-LEVEL ANALYSES ##########


library(data.table)
library(tidyverse)

library(readxl)
library(stringr)
library(foreign)
library(readstata13)

library(lfe)
library(did)
library(HonestDiD)

library(RColorBrewer)
library(stargazer)
library(xtable)

library(sf)


rm(list=ls())
gc()
graphics.off()

#Set working directory
setwd("..")


##### FIGURE A1 - The decline of direct taxation ####

dat <- read_excel("./data/budget-1800-1880.xlsx", skip=1, col_names=T) %>% as.data.table
dat <- dat[,year:landtax]
dat[year=="an 9", year:=1801] #1800/1801
dat[year=="an 13", year:=1805]
dat <- dat[year!="an 10",] #numbers for an 10 are imprecise

dat[,year:=as.numeric(year)]

dat[,sharedir:=direct/totalordinaryreceipts*100]
dat[,shareind:=indirect/totalordinaryreceipts*100]
dat[,shareland:=landtax/totalordinaryreceipts*100]

pdf("./figures/sharetaxesrevenue18001880.pdf", height=5)

plot(dat[,c("year","shareind")], ylim=c(0,100), type='o',pch=2, ylab="% ordinary public revenue")
lines(dat[,c("year","sharedir")], type='o', lty=2,pch=3)
lines(dat[,c("year","shareland")], type='o', lty=3,pch=4)
legend("topright",  c("indirect taxes","direct taxes","land tax"), lty=c(1,2,3), pch=c(2,3,3))

dev.off()


######### TABLE A.2 Summary statistics - department #########

load("./data/data_dep.RData")

dt[,firstyearcad:=min(ifelse(areacadshare>0,year,9999),na.rm=T), keyby=code_dept]
dt[firstyearcad<1807, firstyearcad:=NA]
dt[,rebel18001806:=sum(ifelse(year %in% 1800:1806,rebel,0)), keyby=c("code_dept")]

dt <- dt[year==1808,]

vars <- c("firstyearcad","popdepint","shareurbanpopintdep","ncomsdep","areadep","contfoncint","frais","newrecev","recevexp","distpref","distspref","distbrig","distroads","distriver","distforest","altmax","rugged","wheat","polsoc","rebel1700s","rebel18001806","nbank")

tab <- data.frame(var=vars, mean=NA, median=NA,min=NA,max=NA,sd=NA)
for(i in 1:length(vars)){
  tab[i,-1] <- sapply(c("mean","median","min","max","sd"), do.call, args = list(dt[year==1808 & !is.na(get(vars[i])),get(vars[i])]))
}
tab <- as.data.table(tab)
tab <- t(tab) %>% as.data.table
names(tab) <- tab[1,] %>% as.character
tab <- tab[-1,]
tab <- tab[,lapply(.SD,as.numeric),.SDcols = firstyearcad:nbank]

tab[,`First cadaster year`:=round(firstyearcad,0)]
tab[,`Dept population`:=round(popdepint,0)]
tab[,`Share urban`:=round(shareurbanpopintdep*100,2)]
tab[,`Dept area`:=round(areadep,0)]
tab[,`N communes`:=round(ncomsdep,0)]
tab[,`Land tax`:=round(contfoncint,0)]
tab[,`Tax coercion spending`:=round(frais,0)]
tab[,`New tax collector`:=round(newrecev,2)]
tab[,`Tax collector tenure`:=round(recevexp,0)]
tab[,`Dist. dept capital`:=round(distpref,0)]
tab[,`Dist. arrondt capital`:=round(distspref,0)]
tab[,`Dist. brigade`:=round(distbrig,0)]
tab[,`Dist. roads`:=round(distroads,0)]
tab[,`Dist. river`:=round(distriver,0)]
tab[,`Dist. forest`:=round(distforest,0)]
tab[,`Altitude`:=round(altmax,0)]
tab[,`Ruggedness`:=round(rugged,0)]
tab[,`Wheat agricultural suitability`:=round(wheat,0)]
tab[,`Political societies (1789-1794)`:=round(polsoc,0)]
tab[,`Social conflict 1700s`:=round(rebel1700s,2)]
tab[,`Rebellions 1800-1806`:=round(rebel18001806,2)]
tab[,`N banks`:=round(nbank,2)]

out <- tab[,lapply(.SD, as.character),.SDcols=`First cadaster year`:`N banks`]
out <- t(out) %>% as.data.frame
names(out) <- c("Mean","Median","Min","Max","Std. Dev.")

N <- lapply(dt[year==1808,..vars], FUN=function(x){length(x[!is.na(x)])}) %>% unlist %>% as.vector

out <- cbind(N, out)

print(xtable(out),
      include.rownames = T, 
      include.colnames = T,
      table.placement = 't!',
      type = "latex",
      only.contents = T,
      booktabs = F,
      sanitize.text.function = identity,
      file = "tables/summary_stats_dep.tex")

### FIGURE 4: Map: Per capita land tax in 1810 and 1880 and estimated tax burden ####

load("./data/maps_cadaster.RData")
load("./data/data_dep2.RData")
dt <- read.csv("./data/data_dep.csv") %>% as.data.table

deptf <- merge(deptf, unique(dt[,c("code_dept","newtax2")]), by="code_dept")
deptf <- merge(deptf, dt[year==1810, c("code_dept","contfoncint","contfoncpop")], by="code_dept")
deptf <- merge(deptf, dt[year==1880, c("code_dept","contfoncint","contfoncpop")], by="code_dept", suffixes=c("1810","1880"))

reds <- brewer.pal(5, 'Reds')
blues<- brewer.pal(5, 'Blues')

ggplot() + geom_sf(data=deptf, aes(fill=newtax2))+labs(fill="Estimated\neffective\n tax rate") + scale_fill_gradientn(colours=blues) + theme_void()
ggsave("./figures/map_newtax.jpg", width=7, height=7)

ggplot() + geom_sf(data=deptf, aes(fill=contfoncpop1810))+labs(fill="Land tax\nper capita\nin 1810") + scale_fill_gradientn(colours=reds, limits=c(2,14))+ theme_void()
ggsave("./figures/map_contfoncpop1810.jpg", width=7, height=7)

ggplot() + geom_sf(data=deptf, aes(fill=contfoncpop1880))+labs(fill="Land tax\nper capita\nin 1880")+ scale_fill_gradientn(colours=reds, limits=c(2,14)) + theme_void()
ggsave("./figures/map_contfoncpop1880.jpg", width=7, height=7)

### DID functions  #######

#Callaway Sant'Anna 2021
csdidcoefs <- function(outcome="lcontfoncint", treatvar,d=dt, cg=c("nevertreated", "notyettreated"), adj=F){
  
  d <- d[!is.na(get(outcome)) & !is.na(get(treatvar)),]
  d[,treat:=min(ifelse(get(treatvar)==1,year,NA),na.rm=T), keyby="code_dept"]
  d[is.infinite(treat) , treat:=0]
  
  if(adj==T){form <- as.formula("~ lshareurban+lrugged+lwheat+ldistparis+ldistpref+ldistroads+rebel+lnbank")}
  if(adj==F){form <- as.formula("~1")}
  
  out <- att_gt(yname=outcome, 
                tname="year",
                idname="code_dept",
                gname="treat",
                control_group = cg,
                clustervars = "code_dept",
                xformla = form,
                base_period = "universal",
                data=d)
  outcoefs <- aggte(out, type="dynamic", min_e = -20, max_e=30, na.rm=T) 
  return(outcoefs)
}

plotcsdidcoefs <- function(out, col=1, main="", add=F, ylim=c(-.15, .15), leadlag = -15:25){
  out <- as.data.frame(out[c("egt","att.egt","se.egt")])
  out <- out[out$egt %in% leadlag,]
  if(add==F){
    plot(out$egt, out$att.egt, type='o', lwd=2, col=col, ylim=ylim,  ylab="Estimate", xlab="year pre/post", main=main)
  }
  if(add==T){
    points(out$egt, out$att.egt, col=col, lwd=2, type='o')
  }
  segments(out$egt, out$att.egt-1.96*out$se.egt, out$egt, out$att.egt+1.96*out$se.egt, col=col)
}

#Chaisemartin & D'Haultfoeuille 2020 and 2024
ploteventstudy_cdh_cont <- function(files,mains=c(""),ylab="Estimate", xlab="", ylim=NULL, add=F){
  for(i in 1:length(files)){
    if(is.null(ylab)){ylab <- files[i]}
    out <- read.dta13(paste("./stata outputs/",files[i],".dta",sep="")) %>% as.data.table
    out <- out[out$time_to_treatment %in% -3:15,]
    out$time_to_treatment <- 5*out$time_to_treatment
    
    if(is.null(ylim)){
      min <- min(out$treatment_effect_lower_95CI)
      max <- max(out$treatment_effect_upper_95CI)
      ylim <- c(min,max)
    }
    if(add==F){
      plot(out[,c("time_to_treatment","treatment_effect")], ylim=ylim, main=mains[i], xlab=xlab[i], ylab=ylab,  type='o', lwd=2)
      segments(out$time_to_treatment,out$treatment_effect_lower_95CI,out$time_to_treatment, out$treatment_effect_upper_95CI)
      abline(h=0, v=-5, lty=2)
    }
    if(add==T){
      lines(out[,c("time_to_treatment","treatment_effect")], ylim=ylim, main=mains[i], xlab=xlab[i], ylab=ylab,  type='o', col=2, lwd=2)
      segments(out$time_to_treatment,out$treatment_effect_lower_95CI,out$time_to_treatment, out$treatment_effect_upper_95CI, col=2)
      abline(h=0, v=-5, lty=2)
    }
  }
}
ploteventstudy_cdh_bin <- function(files,mains=c(""),ylab="Estimate", xlab="", ylim=NULL, add=F){
  for(i in 1:length(files)){
    if(is.null(ylab)){ylab <- files[i]}
    out <- read.dta13(paste("./stata outputs/",files[i],".dta",sep="")) %>% as.data.table
    out <- out[out$time_to_treat %in% -5:6,]
    out$time_to_treat <- out$time_to_treat-1
    out$time_to_treat <- 5*out$time_to_treat #5-year periods
    
    if(is.null(ylim)){
      min <- min(out$lb_CI_95)
      max <- max(out$up_CI_95)
      ylim <- c(min,max)
    }
    if(add==F){
      plot(out[,c("time_to_treat","point_estimate")], ylim=ylim, main=mains[i], xlab=xlab[i], ylab=ylab,  type='o', lwd=2)
      segments(out$time_to_treat,out$lb_CI_95,out$time_to_treat, out$up_CI_95)
    }
    #abline(h=0, v=0, lty=2)
    if(add==T){
      lines(out[,c("time_to_treat","point_estimate")], col=2, type='o', lwd=2)
      segments(out$time_to_treat,out$lb_CI_95,out$time_to_treat, out$up_CI_95,col=2)
    }
  }
}

honest_did <- function(es,
                       e = 0,
                       type = c("smoothness", "relative_magnitude"),
                       gridPoints = 100,
                       ...) {
  
  type <- match.arg(type)
  
  # Make sure that user is passing in an event study
  if (es$type != "dynamic") {
    stop("need to pass in an event study")
  }
  
  # Check if used universal base period and warn otherwise
  if (es$DIDparams$base_period != "universal") {
    stop("Use a universal base period for honest_did")
  }
  
  # Recover influence function for event study estimates
  es_inf_func <- es$inf.function$dynamic.inf.func.e
  
  # Recover variance-covariance matrix
  n <- nrow(es_inf_func)
  V <- t(es_inf_func) %*% es_inf_func / n / n
  
  # Remove the coefficient normalized to zero
  referencePeriodIndex <- which(es$egt == -5)
  V    <- V[-referencePeriodIndex,-referencePeriodIndex]
  beta <- es$att.egt[-referencePeriodIndex]
  
  nperiods <- nrow(V)
  npre     <- sum(1*(es$egt < -5))
  npost    <- nperiods - npre
  baseVec1 <- basisVector(index=(e+1),size=npost)
  orig_ci  <- constructOriginalCS(betahat        = beta,
                                  sigma          = V,
                                  numPrePeriods  = npre,
                                  numPostPeriods = npost,
                                  l_vec          = baseVec1)
  
  if (type=="relative_magnitude") {
    robust_ci <- createSensitivityResults_relativeMagnitudes(betahat        = beta,
                                                             sigma          = V,
                                                             numPrePeriods  = npre,
                                                             numPostPeriods = npost,
                                                             l_vec          = baseVec1,
                                                             gridPoints     = gridPoints,
                                                             ...)
    
  } else if (type == "smoothness") {
    robust_ci <- createSensitivityResults(betahat        = beta,
                                          sigma          = V,
                                          numPrePeriods  = npre,
                                          numPostPeriods = npost,
                                          l_vec          = baseVec1,
                                          ...)
  }
  
  return(list(robust_ci=robust_ci, orig_ci=orig_ci, type=type))
}

#TWFE 
twfe <- function(depvar="lcontfoncint", treatvar="areacadshare", covs=NULL, dta=dt, range=1795:1850){
  dta <- dta[year %in% range,]
  form <- paste(depvar,"~",treatvar, covs, "| code_dept + year + (ldistforest+ldistpref+ldistroads+ldistparis+lrugged+lwheat):factor(year) | 0 | code_dept")
  felm(as.formula(form), data=dta) %>% summary %>% .$coefficients  -> f
  out <- data.table(coef=f[treatvar,"Estimate"], cse=f[treatvar,"Cluster s.e."]) 
  return(out)
}

##### Data ######

#Quinquennial data
load("./data/data_dep.RData")
dt <- dt[year/5==round(year/5),]

#TABLE 3 - Cadastral information on surveyed land at the end of the Empire

dt[year==1815,c("areacadshare","oldrevpp","newrevpp","diffrev","newtax3")] %>% 
  rename(`Share surveyed area in 1815`=areacadshare, `Pre-cadaster land tax base p.c.`=oldrevpp, 
         `Cadastral land tax base p.c.`=newrevpp, `Per cent tax base difference`=diffrev, 
         `Average effective land tax`=newtax3) %>% 
  stargazer(., type="text", digits=2, float=F, out="./tables/descriptives_infocapacity1821.tex") 

#FIGURE 6 - Over/undertaxed departments and land tax allocation after 1815 ####

#Left panel

plotintervar <- function(intervar="newtax",dta=dt, ylims=c(-.05,.05)){
  
  form <- paste("lcontfoncint ~  scale(",intervar,")*fyears + lpop + lshareurban + lnbatciv +  rebel | code_dept  + fyears + (lrugged+lwheat+ldistroads+ldistpref+ldistparis+maxcadcent+diffrev):fyears | 0 | code_dept",sep="")
  
  felm(as.formula(form), dta[year<1855,]) %>% summary -> f
  
  coefs <- f$coefficients[grep(paste(intervar,".+fyears",sep=""),rownames(f$coefficients)),]
  xs <- str_extract(rownames(coefs),"[0-9]+") %>% as.numeric
  xs <- c(xs,1815) 
  coefs <- rbind(coefs,0)
  
  plot(xs,coefs[,1], ylim=ylims, ylab="Estimate", xlab="year")
  segments(xs,coefs[,1]+coefs[,2]*1.96,xs,coefs[,1]-coefs[,2]*1.96)
  abline(h=0, lty=2)
  
}

pdf("./figures/plot_newtax_beforeafter1815.pdf")
plotintervar("newtax", ylim=c(-.08,.05)) 
abline(v=1815, lty=2)
dev.off()

#Right panel
pdf("./figures/plotyear_landtax_bynewtax.pdf")

plot(dt[newtax>= median(dt$newtax) & year<1900, mean(contfoncint/popdepint,na.rm=T), keyby=year], type='o', lty=2, ylim=c(3,8.5), ylab="Land tax/hab.")
dt[newtax< median(dt$newtax) & year<1900, mean(contfoncint/popdepint,na.rm=T), keyby=year] %>% lines(type='o')
abline(v=1815, lty=3)
legend('topright', c("Overtaxed","Undertaxed"), lty=2:1)

dev.off()


### FIGURE 7 : Cadaster and fiscal extraction: TWFE and event study approach ####

#DID Chaisemartin & D'Haultfoeuille 2020 with continuous treatment
pdf("./figures/plot_cdh_cadcent_lcontfoncpop.pdf")
files <- c("lcontfoncpop_treatcent_nocont","lcontfoncpop_treatcent")
par(mfrow=c(1,1))
ploteventstudy_cdh_cont(c(files[1]), ylim=c(-.15,.15), main="DID: De Chaisemartin and d'Haultfoeuille (2020)")
ploteventstudy_cdh_cont(c(files[2]), add=T)
legend('topright',c("No controls","Controls"), pch=1, col=c(1,2))
dev.off()


##DID Chaisemartin & D'Haultfoeuille 2024 with binary treatment ##
pdf("./figures/plot_cdh_cadcentmed_lcontfoncpop.pdf")
files <- c("lcontfoncpop_treatcentmed_nocont","lcontfoncpop_treatcentmed")
par(mfrow=c(1,1))
ploteventstudy_cdh_bin(c(files[1]), ylim=c(-.15,.15), add=F, main="DID: De Chaisemartin and d'Haultfoeuille (2024)")
ploteventstudy_cdh_bin(c(files[2]), add=T)
legend('topright', c("No controls","Controls"), pch=1, col=c(1,2))
abline(h=0, v=-5, lty=2)
dev.off()


#Quinquennial data
load("./data/data_dep.RData")
dt <- dt[year/5==round(year/5),]

#Callaway Sant'Anna 2021 with binary treatment 
cutoffs <- seq(.2,.3,.05)
outs <- list()
outsadj <- list()
for(i in 1:length(cutoffs)){
  dt[,paste("cadcent",cutoffs[i],sep=""):=ifelse(areacadcentshare>cutoffs[i],1,0)]
  cg <- c("notyettreated")
  if(cutoffs[i]>.2){cg<-c("nevertreated","notyettreated")}
  csdidcoefs(outcome="lcontfoncpop", treatvar=paste("cadcent",cutoffs[i],sep=""), d=dt[year<1850,], cg=cg) -> outs[[i]]
  csdidcoefs(outcome="lcontfoncpop", treatvar=paste("cadcent",cutoffs[i],sep=""), d=dt[year<1850,], cg=cg, adj=T) -> outsadj[[i]]
}

res_cs_lcontfoncpop_nocont <- outs
res_cs_lcontfoncpop <- outsadj

pdf("./figures/plot_cs_cadcent_lcontfoncpop_cutoffs.pdf")

par(mfrow=c(2,2))
for(i in 1:length(cutoffs)){
  plotcsdidcoefs(outs[[i]], main=paste("% centralized cadaster > ", cutoffs[i]*100, sep=""), col=1, add=F)
  plotcsdidcoefs(outsadj[[i]], main=paste("% centralized cadaster > ", cutoffs[i]*100, sep=""), col=2, add=T)
  
  abline(h=0, lty=2, col='gray')
  abline(v=-5, lty=2, col='gray')
}
par(mfrow=c(1,1))
i <- 3
plotcsdidcoefs(outs[[i]], col=1, add=F,main="DID: Callaway and Sant'Anna (2021)")
plotcsdidcoefs(outsadj[[i]], main="", col=2, add=T)
legend('topright', c("No controls","Controls"), pch=1, col=1:2)

abline(h=0, lty=2, col='gray')
abline(v=-5, lty=2, col='gray')

dev.off()

## TWFE Land tax per capita
pdf("./figures/twfe_landtaxpop.pdf", height=5, width=5)

out <- lapply(c("areacadshare","areacadcentshare","areacadlocshare"), twfe, depvar="lcontfoncpop",  range=1795:1850, dta=dt, covs="+rebel+lnbank") %>% rbindlist 
xs <- 1:nrow(out)

plot(NA, ylim=c(-.8,.5), ylab="Impact of % area cadastered", xlab="", xaxt="n", pch=16:18, main="TWFE", xlim=c(.5,nrow(out)+.5), cex=1.5)
abline(h=seq(-1,1,.1),col='gray', lty=3)
points(xs, out$coef, ylim=c(-1,1), ylab="Impact of % area cadastered", pch=16:18)
segments(xs, out$coef-1.96*out$cse,xs, out$coef+1.96*out$cse)
abline(h=0, lty=2)
legend("topright", pch=16:18, c("All cad.","Centralized cad.","Decentralized cad."), bg='white')

dev.off()

### FIGURE A2: TWFE Cadaster and fiscal revenue from land ####


pdf("./figures/twfe_landtax.pdf", height=5)

out <- lapply(c("areacadshare","areacadcentshare","areacadlocshare"), twfe, depvar="lcontfoncint",  range=1795:1850, dta=dt, covs=NULL) %>% rbindlist
xs <- 1:nrow(out)

plot(NA, ylim=c(-.5,.5), ylab="Impact of % area cadastered", xlab="", xaxt="n", pch=16:18, main="TWFE", xlim=c(.5,nrow(out)+.5))
abline(h=seq(-1,1,.1),col='gray', lty=3)
points(xs, out$coef, ylim=c(-1,1), ylab="Impact of % area cadastered", xlim=c(.5,nrow(out)+.5))
segments(xs, out$coef-1.96*out$cse,xs, out$coef+1.96*out$cse)
abline(h=0, lty=2)
legend("topright", pch=16:18, c("All cad.","Centralized cad.","Decentralized cad."), bg='white')
legend("topleft", pch=c(16,16), col=c(1,"darkgray"), c("control for population","no control for population"), bg='white')

out <- lapply(c("areacadshare","areacadcentshare","areacadlocshare"), twfe, depvar="lcontfoncint",  range=1795:1850, dta=dt, covs="+lpop") %>% rbindlist
xs <- xs+.2
points(xs, out$coef, pch=16:18, col='darkgray')
segments(xs, out$coef-1.96*out$cse,xs, out$coef+1.96*out$cse, col='darkgray')

dev.off()

### FIGURE A6: Cutoff sensitivity ####

cutoffs <- seq(.25,.35,.01)
outs <- list()
outsadj <- list()
for(i in 1:length(cutoffs)){
  dt[,paste("cadcent",cutoffs[i],sep=""):=ifelse(areacadcentshare>cutoffs[i],1,0)]
  cg <- c("notyettreated")
  if(cutoffs[i]>.22){cg<-c("nevertreated","notyettreated")}
  csdidcoefs(outcome="lcontfoncpop", treatvar=paste("cadcent",cutoffs[i],sep=""), d=dt[year<1850,], cg=cg) -> outs[[i]]
  csdidcoefs(outcome="lcontfoncpop", treatvar=paste("cadcent",cutoffs[i],sep=""), d=dt[year<1850,], cg=cg, adj=T) -> outsadj[[i]]
}

lapply(outs, FUN=function(x){ b <- x$att.egt; t <- which(x$egt==0); return(data.table(x$att.egt[t], x$se.egt[t]))}) %>% rbindlist %>% cbind(cutoffs,.) %>% rename(coef=V1, se=V2) -> out
lapply(outsadj, FUN=function(x){ b <- x$att.egt; t <- which(x$egt==0); return(data.table(x$att.egt[t], x$se.egt[t]))}) %>% rbindlist %>% cbind(cutoffs,.) %>% rename(coef=V1, se=V2) -> outadj

pdf("./figures/sensitivity_cutoff_didcad_lcontfoncpop.pdf", height=5)
plot(out$cutoffs, out$coef, ylim=c(-.1,.1), ylab="Estimate t=0", xlab="Cutoff", xlim=c(.25,.355))
segments(out$cutoffs, out$coef+1.96*out$se, out$cutoffs, out$coef-1.96*out$se)
xs <- outadj$cutoffs*1.01
points(xs, outadj$coef, col=2)
segments(xs, outadj$coef+1.96*outadj$se, xs, outadj$coef-1.96*outadj$se, col=2)
abline(h=0, lty=2)
legend("topright", c("No adjustment", "Doubly robust adjustment for covariates"), pch=c(1,1), col=1:2)
dev.off()

### FIGURE A7 Evolution of the land tax by department ####

setkey(dt,"code_dept","year")
d <- dt[year<1850 & code_dept!=82, mean(contfoncint/1000),keyby=c("code_dept","year")] %>% 
  mutate("Dept code"=as.factor(code_dept)) %>% 
  group_by(code_dept) %>% mutate(change=max(ifelse(year==1825,V1/lag(V1,2)-1,-10000))) %>% 
  mutate(`Land tax evolution 1815-1825`=ifelse(change>0,'Increase','Decrease')) %>% as.data.table

ggplot(d, aes(x=year, y=V1, group=`Dept code`, colour=`Land tax evolution 1815-1825`) ) + theme_minimal() + theme(legend.position="bottom") +
    geom_line() + ylab("land tax (thousands francs)")
ggsave("./figures/plotyear_contfonc_alldep.jpg", scale=.35)

setkey(dt,"code_dept","year")
d <- dt[year<1850 & code_dept!=82, mean(contfoncpop),keyby=c("code_dept","year")] %>% 
  mutate("Dept code"=as.factor(code_dept)) %>% 
  group_by(code_dept) %>% mutate(change=max(ifelse(year==1825,V1/lag(V1,2)-1,-10000))) %>% 
  mutate(`Land tax/hab evolution 1815-1825`=ifelse(change>0,'Increase','Decrease')) %>% as.data.table

ggplot(d, aes(x=year, y=V1, group=`Dept code`, colour=`Land tax/hab evolution 1815-1825`) ) + theme_minimal() + theme(legend.position="bottom") +
  geom_line() + ylab("land tax/hab")
ggsave("./figures/plotyear_contfoncpop_alldep.jpg", scale=.35)

### FIGURE A8: HonestDID #####

# follow CS GitHub to make compatible
cg <- c("nevertreated","notyettreated")

out <- csdidcoefs(outcome="lcontfoncpop", treatvar="cadcent0.3", d=dt[year<1850,], cg=cg, adj=T)
ggdid(out)
hd_outcoef <- honest_did(es = out, 
                         e = 0,
                         type="relative_magnitude")
hd_outcoef$robust_ci <- hd_outcoef$robust_ci[-1,]
cs_HDiD_relmag <- createSensitivityPlot_relativeMagnitudes(hd_outcoef$robust_ci,
                                                           hd_outcoef$orig_ci)
cs_HDiD_relmag
ggsave("./figures/honestdid_cadcent03.jpeg")

out <- csdidcoefs(outcome="lcontfoncpop", treatvar="cadcent0.25", d=dt[year<1850,], cg=cg, adj=T)
ggdid(out)
hd_outcoef <- honest_did(es = out, 
                         e = 0,
                         type="relative_magnitude")
hd_outcoef$robust_ci <- hd_outcoef$robust_ci[-1,]
cs_HDiD_relmag <- createSensitivityPlot_relativeMagnitudes(hd_outcoef$robust_ci,
                                                           hd_outcoef$orig_ci)

cs_HDiD_relmag
ggsave("./figures/honestdid_cadcent025.jpeg")

### FIGURE A3: Alternative measures of fiscal capacity #####

load("./data/data_dep.RData")

dt[,highcad:=ifelse(code_dept %in% dt[areacadshare>.30 & year==1821, code_dept],1,0)]
dt[,lowcad:=ifelse(code_dept %in% dt[areacadshare<=.30 & year==1821, code_dept],1,0)]

reds <- brewer.pal(4,"Reds")

### FIGURE A3 Alternative measures:  tax collectors and tax coercion ####

pdf("./figures/plotyear_recev.pdf")

dt[highcad==1,mean(newrecev,na.rm=T), keyby=year] %>% plot(type='l', lty=2, xlim=c(1800,1845), ylab="New tax collector")
dt[lowcad==1,mean(newrecev, na.rm=T), keyby=year] %>% lines
legend('topright', c("% area cadastered > 30","% area cadastered < 30"), lty=2:1)

dev.off()

pdf("./figures/plotyear_taxcoercion.pdf")

dt[highcad==1 ,mean(fraismont,na.rm=T), keyby=year] %>% plot(type='p', lty=2, xlim=c(1800,1845), ylab="Coercion/amount")
dt[lowcad==1,mean(fraismont,na.rm=T), keyby=year] %>% points(pch=2)
legend('topright', c("% area cadastered > 30","% area cadastered < 30"), lty=2:1)

dev.off()


### FIGURE A4 centralized cadaster and tax coercion ####

load("./data/data_dep.RData")

dt <- dt[year<1815,]

#Lower cutoffs since only 1802-1815 period
dt[,cadcent0.12:=ifelse(areacadcentshare>.12,1,0)]
dt[,cadcent0.15:=ifelse(areacadcentshare>.15,1,0)]
dt[,cadcent0.18:=ifelse(areacadcentshare>.18,1,0)]

dt[,lfraismont:=log(fraismont)]

pdf("./figures/cs_taxcoercion.pdf", width=9)

out1 <- csdidcoefs("lfraispop1806","cadcent0.12",dt[year<1815,], cg="notyettreated",adj=T)
out2 <- csdidcoefs("lfraispop1806","cadcent0.15",dt[year<1815,], cg="notyettreated", adj=T)
out3 <- csdidcoefs("lfraispop1806","cadcent0.18",dt[year<1815,], cg="notyettreated", adj=T)

out1 <- csdidcoefs("lfraismont","cadcent0.12",dt[year<1815,], cg="notyettreated",adj=T)
out2 <- csdidcoefs("lfraismont","cadcent0.15",dt[year<1815,], cg="notyettreated", adj=T)
out3 <- csdidcoefs("lfraismont","cadcent0.18",dt[year<1815,], cg="notyettreated", adj=T)

plotcsdidcoefs(out1, col=reds[2], ylim=c(-1,1), leadlag=-5:5)
plotcsdidcoefs(out2, add=T, col=reds[3], leadlag=-5:5)
plotcsdidcoefs(out3, add=T, col=reds[4], leadlag=-5:5)
abline(v=-1, h=0, lty=2)
legend('topright', title="Cutoff:",c("12% share area cadastered", "15%","18%"), col=reds[2:4], lty=c(1,1,1), bg='white')

dev.off()

### FIGURE A5 centralized cadaster and renewal of tax collectors #######

load("./data/data_dep.RData")

csdidcoefs <- function(outcome="lcontfoncint", treatvar,d=dt, cg=c("nevertreated", "notyettreated"), adj=F){
  
  d <- d[!is.na(get(outcome)) & !is.na(get(treatvar)),]
  d[,treat:=min(ifelse(get(treatvar)==1,year,NA),na.rm=T), keyby="code_dept"]
  d[is.infinite(treat) , treat:=0]
  
  if(adj==T){form <- as.formula("~ lshareurban+lrugged+lwheat+ldistparis+ldistpref+ldistroads+rebel+lnbank")}
  if(adj==F){form <- as.formula("~1")}
  
  out <- att_gt(yname=outcome, 
                tname="year",
                idname="code_dept",
                gname="treat",
                control_group = cg,
                clustervars = "code_dept",
                xformla = form,
                base_period = "universal",
                data=d)
  outcoefs <- aggte(out, type="dynamic", min_e = -20, max_e=30, na.rm=T) 
  return(outcoefs)
}

reds <- brewer.pal(6,"Reds")
cutoffs <- seq(.1,.3,.05)
out <- list()
for(i in seq_along(cutoffs)){
  dt[,paste0("cadcent",cutoffs[i]):=ifelse(areacadcentshare>cutoffs[i],1,0)]
  out[[i]] <- csdidcoefs(outcome="newrecev", treatvar=paste0("cadcent",cutoffs[i]),dt, cg=c("notyettreated"), adj=T)
}

pdf("./figures/cs_newrecev.pdf", width=9)

plotcsdidcoefs(out[[1]], col=reds[2], ylim=c(-1,1), leadlag=-5:5)
for(i in 2:5){
  plotcsdidcoefs(out[[i]], add=T, col=reds[1+i], leadlag=-5:5)
}
legend('topright', title="Cutoff:",c("10% share area cadastered", "15%","20%","25%","30%"), col=reds[2:6], lty=c(1,1,1), bg='white')
abline(v=-1,h=0)

dev.off()

pdf("./figures/twfe_newrecev.pdf", width=5, height=5)

out <- lapply(c("areacadshare","areacadcentshare","areacadlocshare"), twfe, depvar="newrecev",  range=1795:1850, dta=dt, covs="+lpop") %>% rbindlist
xs <- 1:nrow(out)
plot(NA, ylim=c(-.8,.5), ylab="Impact of % area cadastered", xlab="", xaxt="n", pch=16:18, main="Renewal of tax collectors", xlim=c(.5,nrow(out)+.5), cex=1.5)
abline(h=seq(-1,1,.1),col='gray', lty=3)
points(xs, out$coef, ylim=c(-1,1), ylab="Impact of % area cadastered", xlab="", xaxt="n", pch=16:18, xlim=c(.5,nrow(out)+.5))
segments(xs, out$coef-1.96*out$cse,xs, out$coef+1.96*out$cse)
abline(h=0, lty=2)
legend("topright", pch=16:18, c("All cad.","Centralized cad.","Decentralized cad."), bg='white')

dev.off()


